In [ ]:
# from IPython.display import HTML
# HTML('''<script>
# code_show=true;
# function code_toggle() {
# if (code_show){
# $('div.input').hide();
# } else {
# $('div.input').show();
# }
# code_show = !code_show
# }
# $( document ).ready(code_toggle);
# </script>
# <form action="javascript:code_toggle()"><input type="submit" value="Toggle ON/OFF raw code cells."></form>''')
Let us now assemble all steps above in a pipeline, to be run for all and each image in the CS82 archive.
For each image, the pipeline should:
FLAGS
>0ALPHA_J2000
,DELTA_J2000
)FLAGS
histogram{MAG,MAGERR}_{AUTO,MODEL,PSF,APER,SPHEROID,HYBRID,BEST,PETRO,ISO,ISOCOR}
mag_type | SNR | MAG_ERROR | MAG_LIMIT_CURVEFIT | MAG_LIMIT_BOXPLOT |
---|---|---|---|---|
AUTO | 5 | 0.2 | 24.44 | 24.59 |
AUTO | 2 | 0.5 | 25.69 | None |
BEST | 5 | 0.2 | 24.47 | 24.6 |
BEST | 2 | 0.5 | 25.71 | None |
PSF | 5 | 0.2 | 24.95 | 24.82* |
PSF | 2 | 0.5 | 25.99 | None |
... |
This table should be named ${tilename}_magLimits.csv
In [ ]:
import matplotlib
# matplotlib.use('Agg')
# import seaborn
# seaborn.set()
import astropy
import pandas
import numpy
import scipy
In [ ]:
class Plots:
'''
Manage data source and plot creation
'''
@staticmethod
def scatter(x, y, vlines=None, fit_func=None, title=None, savefig=False):
import pandas
df = pandas.DataFrame({x.name:x, y.name:y})
def scatter_mags(x,y,df):
from matplotlib import pyplot as plt
plt.scatter(df[x.name], df[y.name], marker='.', alpha=0.1, lw=None)
plt.xlabel(x.name)
plt.ylabel(y.name)
plt.title(title)
return plt
plt = scatter_mags(x,y,df)
plt.hold(True)
def fit_curve(x,y,fit_func):
import numpy as np
from scipy.optimize import curve_fit
popt,pcov = curve_fit(fit_func, x, y)
x = np.linspace(x.min(),x.max(),100)
y = fit_func(x,*popt)
return x,y,popt
if fit_func != None:
x_fit, y_fit, popt = fit_curve( df[x.name], df[y.name], fit_func )
plt.plot( x_fit, y_fit, linestyle='solid', color='red', lw=1 )
vlines_intersec = None
if vlines:
vlines_intersec = []
for x_line in vlines:
y_val = fit_func(x_line,*popt)
vlines_intersec.append(y_val)
plt.axvline(x_line, color='red', linestyle='dotted')
_lbl = ' {:.2f}, {:.2f} '.format(x_line,y_val)
plt.text(x_line+0.01, y_val-5, _lbl, bbox=dict(facecolor='white', alpha=0.9))
del df
plt.hold(False)
fig = plt.gcf()
return fig,vlines_intersec
@staticmethod
def boxplot(column, by, hlines, nbins=20, fit_func=None, savefig=False):
'''
Boxplot distributions of 'column' splitted by column 'by' in 'nbins'
'hlines' is a list of values within 'column' limits, where 'column'
should be annotated.
'''
import pandas
df = pandas.DataFrame({column.name:column, by.name:by})
bin_categories,bins = pandas.qcut(df[by.name], nbins, retbins=True, precision=2)
bincol = '_'+by.name+'_'
df[bincol] = bin_categories
from matplotlib import pyplot as plt
plt.hold(True)
ax = df.boxplot(column.name,by=bincol, rot=90, sym='*')
# ---
def fit_linear_curve(x,y,func):
from scipy.optimize import curve_fit
popt,pcov = curve_fit(func, x, y)
return popt
# ---
hlines_intersec = None
if hlines:
hlines_intersec = []
for hline in hlines:
ax.axhline(hline, color='red', ls='dotted')
def _within_quartil(q):
dq = q.quantile(0.75)-q.quantile(0.25)
mq = q.quantile(0.5)
return mq-1.5*dq < hline and mq+1.5*dq > hline
filt_df = df.groupby(bincol).filter( lambda df,col=column:_within_quartil(df[col.name]) )
y_val = None
h_lbl = ' {} : {} '.format(hline,y_val)
if len(filt_df) > 0:
x = filt_df[column.name]
y = filt_df[by.name]
if fit_func is None:
y_val = y.mean()
else:
popt = fit_linear_curve(x,y,fit_func)
y_val = fit_func(hline,*popt)
h_lbl = ' {:.2f} : {:.2f} '.format(hline,y_val)
ax.text(1, hline+hline*0.1, h_lbl, bbox=dict(facecolor='white', alpha=0.9))
hlines_intersec.append(y_val)
del df
plt.hold(False)
fig = ax.figure
return fig,hlines_intersec
@staticmethod
def footprint(x, y, title, savefig=False):
from matplotlib import pyplot as plt
fig, ax = plt.subplots()
fig.set_size_inches(8,8)
_ = ax.scatter(x=x, y=y, marker='.')
_ = ax.set_xlabel('RA (deg)')
_ = ax.set_ylabel('Dec (deg)')
_title = 'Footprint: {}'.format(tilename)
_ = ax.set_title(_title)
return fig
@staticmethod
def histogram(column,savefig=False):
import seaborn
ax = seaborn.countplot(column)
fig = ax.figure
return fig
class CS82(object):
'''
Holds methods to extract and produce plots to CS82 image
'''
def __init__(self,tilename):
self._catalog = self.open_file(tilename)
self._tilename = tilename
self._columns_radec = self.read_position_columns()
self._columns_mag = self.read_mag_columns()
self._columns_flags = self.read_flags_columns()
self.init_plots()
self._table = None
self._magtype = None
def add_magLims(self,df):
if self._table is None:
self._table = df
else:
self._table = pandas.concat([self._table,df])
@property
def magLims(self):
return self._table
@property
def filename(self):
return self._tilename
@property
def ra(self):
return self._catalog['ALPHA_J2000']
@property
def dec(self):
return self._catalog['DELTA_J2000']
@property
def flags(self):
return self._catalog['FLAGS']
@property
def mag(self):
assert self._magtype
col = 'MAG_'+self._magtype
assert col in self._catalog.columns
return self._catalog[col]
@property
def magerr(self):
assert self._magtype
col = 'MAGERR_'+self._magtype
assert col in self._catalog.columns
return self._catalog[col]
def set_magtype(self,mtype):
self._magtype = mtype
@staticmethod
def open_file(tilename):
from astropy.table import Table
cat = Table.read(tilename,format='fits',hdu=2)
return cat
def init_plots(self):
_columns = self._columns_flags[:]
_columns.extend(self._columns_radec)
_columns.extend([ 'MAG_{}'.format(c) for c in self._columns_mag ])
_columns.extend([ 'MAGERR_{}'.format(c) for c in self._columns_mag ])
cat = self._catalog[_columns]
df = cat.to_pandas()
for col in _columns:
nil_indxs = df[col]==99
df.loc[nil_indxs,col] = None
df.dropna(inplace=True)
self._catalog = df
def read_position_columns(self):
return ['ALPHA_J2000','DELTA_J2000']
def read_mag_columns(self):
colnames = filter(lambda s:'MAG_' in s, self._catalog.colnames)
colnames = filter(lambda c:self._catalog[c].ndim == 1, colnames)
mag_types = [ s[4:] for s in colnames ]
return mag_types
def read_flags_columns(self):
return ['FLAGS']
In [ ]:
import glob
# tiles_list = ['S82m0m_y.V2.7A.swarp.cut.deV.fit',
# 'S82m1m_y.V2.7A.swarp.cut.deV.fit',
# 'S82m10m_y.V2.7A.swarp.cut.deV.fit']
tiles_list = glob.glob('S82*.V2.7A.swarp.cut.deV.fit')
mags_list = ['AUTO','BEST','MODEL','PSF']
def _rootname_(tilename):
from os.path import basename,splitext
# rootname = splitext(basename(tilename))[0]
rootname = splitext(tilename)[0]
return rootname
def _savefig_filename_(tilename,plot_type):
assert isinstance(plot_type,(str,unicode))
rootname = _rootname_(tilename)
filename = '{}_{}.png'.format(rootname,plot_type)
return filename
def _csv_filename_(tilename):
rootname = _rootname_(tilename)
filename = '{}_{}.csv'.format(rootname,'magLims')
return filename
import pandas
df = None
for i,tilename in enumerate(tiles_list):
print "Running MAGS-LIMIT pipeline for tile: {}".format(tilename)
from matplotlib import pyplot as plt
plt.cla()
plt.clf()
tile_rootname = _rootname_(tilename)
import shutil
import os
if os.path.isdir(tile_rootname):
shutil.rmtree(tile_rootname)
os.mkdir(tile_rootname)
# Open the catalog
tile = CS82(tilename)
filename = os.path.join(tile_rootname,tile.filename)
for mag in mags_list:
print " --> processing {} magnitude measurements;".format(mag)
tile.set_magtype(mag)
error_lims = [0.2,0.5]
def func_linear(t, a, b):
return a*t + b
fig,mag_lims_bx = Plots.boxplot(tile.magerr, by=tile.mag, hlines=error_lims,
fit_func=func_linear, nbins=20, savefig=True)
fig.savefig(_savefig_filename_(filename,'boxplot_'+mag))
del fig
del func_linear
plt.cla()
plt.clf()
def func_log(t, a, b):
from numpy import log
return a + b * log(t)
title = '{}: measurement vs error'.format(mag)
fig,mag_lims_sc = Plots.scatter(x=tile.magerr, y=tile.mag, vlines=error_lims,
fit_func=func_log, title=title, savefig=True)
fig.savefig(_savefig_filename_(filename,'scatter_'+mag))
del fig, title
del func_log
_d = {'Error':error_lims , 'MagErr_Fit':mag_lims_sc , 'MagErr_Box':mag_lims_bx}
del error_lims, mag_lims_bx, mag_lims_sc
_df = pandas.DataFrame(_d)
_df['MagType'] = mag
_df.set_index(['MagType','Error'], inplace=True)
tile.add_magLims(_df)
del _df,_d
plt.cla()
plt.clf()
del mag
print " --> done with magnitude plots and depth estimation;"
fig = Plots.histogram(tile.flags, savefig=True)
fig.savefig(_savefig_filename_(filename,'flags'))
del fig
title = 'Footprint: {}'.format(tile.filename)
fig = Plots.footprint(tile.ra,tile.dec,title=title,savefig=True)
fig.savefig(_savefig_filename_(filename,'footprint'))
del fig,title
tile.magLims.to_csv(_csv_filename_(filename))
print " Done with this tile."
df_tile = tile.magLims
index_new = ['Tile'] + list(df_tile.index.names)
df_tile['Tile'] = tilename
df_tile.reset_index(inplace=True)
df_tile.set_index(index_new, inplace=True)
del index_new
del tile,filename,tile_rootname
if df is None:
df = df_tile
else:
df = pandas.concat([df,df_tile])
del df_tile
del tilename
# df.to_csv('MagLims_allTiles.csv')
del df, mags_list, tiles_list
print "Done. {} catalogs processed".format(i+1)
del i
In [24]:
import pandas
df = pandas.read_csv('MagLims_allTiles.csv',index_col=(0,1,2))
In [25]:
# df.sort_index(inplace=True)
# df_magLims = df.loc[(slice(None),slice(None),0.2),:]
# grouped = df_magLims.reset_index()[['MagType','MagErr_Fit']].set_index('MagType').groupby(level=0)
# df_magLimFit = pandas.DataFrame(index=range(len(df_magLims)/4))
# for gname,group in grouped:
# df_magLimFit[gname] = group.values
In [26]:
df.sort_index(inplace=True)
df = df.loc[(slice(None),['AUTO','BEST','PSF','MODEL'],0.2),:].reset_index(level=['MagType'])
idx_invalid = (df['MagErr_Fit'] < 0).values
df.loc[idx_invalid,'MagErr_Fit'] = None
idx_invalid = (df['MagErr_Box'] < 0).values
df.loc[idx_invalid,'MagErr_Box'] = None
df
Out[26]:
In [27]:
import matplotlib.pyplot as plt
In [28]:
import seaborn
ax = seaborn.violinplot(x='MagType',y='MagErr_Fit',data=bla)
fig = ax.figure
from IPython.display import display
display(fig)
plt.cla()
plt.clf()
In [29]:
import seaborn
ax = seaborn.violinplot(x='MagType',y='MagErr_Box',data=bla)
fig = ax.figure
from IPython.display import display
display(fig)
plt.cla()
plt.clf()
In [39]:
for gn,grp in df.groupby('MagType'):
print '-'*5,"\n",gn,"\n",grp.describe(),"\n"
In [ ]: